The Moran game simulator

One simulation of the MGP

burnin <- 0
tout <- seq(0,400,by=1)
playMoran(n=100,mu=100,times=tout,t0=-burnin,sample=FALSE,tree=TRUE) -> x
registerDoParallel()
x %>% plot(root_time=NA) -> pls

png_path <- file.path(tempdir(),"frame%05d.png")
png(png_path,height=3.7,width=6,res=100,units="in")
for (i in seq_along(pls)) print(pls[[i]])
dev.off()

library(gifski)
png_files <- sprintf(png_path,seq_along(pls))
gif_file <- "mgp1.gif"
gifski(png_files,gif_file,delay=0.02,loop=TRUE)
unlink(png_files)

One simulation of the SMGP

burnin <- 0
tout <- seq(0,400,by=1)
playMoran(n=100,mu=100,times=tout,t0=-burnin,sample=TRUE,tree=TRUE) -> x
registerDoParallel()
x %>% plot(root_time=NA) -> pls

png_path <- file.path(tempdir(),"frame%05d.png")
png(png_path,height=3.7,width=6,res=100,units="in")
for (i in seq_along(pls)) print(pls[[i]])
dev.off()

library(gifski)
png_files <- sprintf(png_path,seq_along(pls))
gif_file <- "mgp2.gif"
gifski(png_files,gif_file,delay=0.02,loop=TRUE)
unlink(png_files)

Simulations of the Observed SMGP

burnin <- 0
tout <- seq(0,400,by=1)
playMoran(n=100,mu=100,times=tout,t0=-burnin,sample=TRUE,tree=TRUE) -> x
registerDoParallel()
x %>% plot() -> pls

png_path <- file.path(tempdir(),"frame%05d.png")
png(png_path,height=3.7,width=6,res=100,units="in")
for (i in seq_along(pls)) print(pls[[i]])
dev.off()

library(gifski)
png_files <- sprintf(png_path,seq_along(pls))
gif_file <- "mgp3.gif"
gifski(png_files,gif_file,delay=0.02,loop=TRUE)
unlink(png_files)

Observed SMGP with more detail

In the following animation depicting one simulation of the SMGP, colored points are drawn to indicate players colored according to the scheme in the text. Red points correspond to “live samples” and “red players”: each one holds one red and one blue ball. Blue points correspond to “dead samples” and “blue players”: each one holds one blue and one green ball. These are samples that are the direct ancestor of another sample. Green points (“green players”) represent branching points: each green player holds two green balls.

burnin <- 0
tout <- seq(0,100,by=1)
playMoran(n=10,mu=10,times=tout,t0=-burnin,sample=TRUE,tree=TRUE,ill=TRUE) -> x
registerDoParallel()
x %>% plot(points=TRUE,diagram=TRUE) -> pls

png_path <- file.path(tempdir(),"frame%05d.png")
png(png_path,height=3.7,width=6,res=100,units="in")
for (i in seq_along(pls)) print(pls[[i]])
dev.off()

library(gifski)
png_files <- sprintf(png_path,seq_along(pls))
gif_file <- "mgp4.gif"
gifski(png_files,gif_file,delay=0.05,loop=TRUE)
unlink(png_files)

Verifying the SMGP formulae

Now we investigate the distributions of the cumulative hazards of the conditional attachment process (main text, Theorem 9). These should be perfectly distributed according to the standard exponential distribution.

pomp::bake(file="mgp_hazards.rds",{
  registerDoParallel()
  registerDoRNG()
  
  nrep <- 2000
  expand.grid(
    rep=seq_len(nrep),
    popsize=c(10,100,1000),
    relsr=c(0.1,1,10),
    nsamples=21
  ) -> design
  
  foreach (i=iter(design,"row"),
           .options.multicore=list(preschedule=FALSE)) %dopar%
    {
      samplerate <- i$relsr * i$popsize
      tout <- cumsum(rexp(n=i$nsamples,rate=samplerate))
      system.time({
        playMoran(
          n=i$popsize,
          mu=i$popsize,
          times=tout,
          t0=0,
          tree=FALSE
        ) %>%
          getInfo(prune=FALSE) %>%
          getElement("cumhaz") %>%
          bind_cols(i) %>%
          rowid_to_column("sample") -> x
      }) -> tm
      x$etime <- tm[3]
      x
    } %>%
    bind_rows() %>%
    mutate(
      p=exp(-Lambda)
    ) -> dat
  
  attr(dat,"cores") <- getDoParWorkers()
  
  dat
  
}) -> dat

The above required 7.7 min on 20 cores. Efficiency: 0.712.

We use Kolmogorov-Smirnov to test the hypothesis that the cumulative hazards are distributed as expected.

dat %>%
  ggplot(aes(x=p))+
  geom_abline(slope=1)+
  stat_ecdf()+
  facet_wrap(~sample)+
  coord_equal()+
  labs(x=expression(italic(p)),y=expression(italic(F(p))))+
  expand_limits(x=c(0,1),y=c(0,1))

dat %>%
  mutate(ratio=factor(samplerate/popsize)) %>%
  ggplot(aes(x=p))+
  geom_abline(slope=1)+
  stat_ecdf()+
  facet_grid(popsize~ratio,labeller=label_both)+
  coord_equal()+
  labs(x=expression(italic(p)),y=expression(italic(F(p))))+
  expand_limits(x=c(0,1),y=c(0,1))
## Error: Problem with `mutate()` input `ratio`.
## ✖ object 'samplerate' not found
## ℹ Input `ratio` is `factor(samplerate/popsize)`.
library(broom)

dat %>%
  filter(p>0) %>%
  group_by(sample) %>%
  do(tidy(ks.test(x=.$Lambda,y=pexp,rate=1))) %>%
  select(p.value) %>%
  ungroup()
dat %>%
  filter(p>0) %>%
  group_by(popsize,samplerate=relsr*popsize) %>%
  do(tidy(ks.test(x=.$Lambda,y=pexp,rate=1))) %>%
  select(p.value) %>%
  ungroup()
dat %>%
  filter(p>0) %>%
  group_by(popsize) %>%
  do(tidy(ks.test(x=.$Lambda,y=pexp,rate=1))) %>%
  select(p.value) %>%
  ungroup()
dat %>%
  filter(p>0) %>%
  group_by(relsr) %>%
  do(tidy(ks.test(x=.$Lambda,y=pexp,rate=1))) %>%
  select(p.value) %>%
  ungroup()
dat %>%
  filter(p>0) %>%
  do(tidy(ks.test(x=.$Lambda,y=pexp,rate=1))) %>%
  select(p.value)

Session Info

## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8    LC_NUMERIC=C            LC_TIME=en_GB.UTF-8    
##  [4] LC_COLLATE=C            LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
##  [7] LC_PAPER=en_US.UTF-8    LC_NAME=C               LC_ADDRESS=C           
## [10] LC_TELEPHONE=C          LC_MEASUREMENT=C        LC_IDENTIFICATION=C    
## 
## attached base packages:
## [1] parallel  grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] broom_0.7.3        doRNG_1.8.2        rngtools_1.5       doParallel_1.0.16 
##  [5] iterators_1.0.13   foreach_1.5.1      phylopomp_0.0.10.0 cowplot_1.1.1     
##  [9] forcats_0.5.0      stringr_1.4.0      dplyr_1.0.3        purrr_0.3.4       
## [13] readr_1.4.0        tidyr_1.1.2        tibble_3.0.5       ggplot2_3.3.3     
## [17] tidyverse_1.3.0    knitr_1.30        
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2          jsonlite_1.7.2      modelr_0.1.8       
##  [4] assertthat_0.2.1    BiocManager_1.30.10 rvcheck_0.1.8      
##  [7] cellranger_1.1.0    yaml_2.2.1          pillar_1.4.7       
## [10] backports_1.2.1     lattice_0.20-41     glue_1.4.2         
## [13] digest_0.6.27       rvest_0.3.6         colorspace_2.0-0   
## [16] htmltools_0.5.1.1   plyr_1.8.6          pkgconfig_2.0.3    
## [19] haven_2.3.1         pomp_3.2            mvtnorm_1.1-1      
## [22] patchwork_1.1.1     tidytree_0.3.3      scales_1.1.1       
## [25] farver_2.0.3        generics_0.1.0      ellipsis_0.3.1     
## [28] withr_2.4.1         lazyeval_0.2.2      cli_2.2.0          
## [31] magrittr_2.0.1      crayon_1.3.4        readxl_1.3.1       
## [34] evaluate_0.14       ps_1.5.0            fs_1.5.0           
## [37] fansi_0.4.2         nlme_3.1-151        xml2_1.3.2         
## [40] tools_4.0.3         hms_1.0.0           lifecycle_0.2.0    
## [43] aplot_0.0.6         ggtree_2.2.0        munsell_0.5.0      
## [46] reprex_1.0.0        compiler_4.0.3      rlang_0.4.10       
## [49] rstudioapi_0.13     labeling_0.4.2      rmarkdown_2.6      
## [52] gtable_0.3.0        codetools_0.2-18    deSolve_1.28       
## [55] DBI_1.1.1           reshape2_1.4.4      R6_2.5.0           
## [58] lubridate_1.7.9.2   treeio_1.12.0       ape_5.4-1          
## [61] stringi_1.5.3       Rcpp_1.0.6          vctrs_0.3.6        
## [64] dbplyr_2.0.0        tidyselect_1.1.0    xfun_0.20          
## [67] coda_0.19-4

References